This script is used for modeling pH of the particluar geochemical system
import pickle
import numpy as np
import pandas as pd
import os
import seaborn as sns
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error,mean_squared_error,r2_score
## The following are the ML models which can be used for trasinning
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import timeit
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
%matplotlib inline
import pandas as pd
import plotly.express as px
import seaborn as sns
sns.set(style="darkgrid")
sns.set_context('talk')
data_dir =r'E:\projects\MLChemicalR\uranium\20_training_sets\30_phreeqc'
datafiles = os.listdir(data_dir)
datafiles =[fl for fl in datafiles if 'dat' in fl]
out_dir = os.path.join(data_dir,'output')
if not os.path.exists(out_dir):
os.mkdir(out_dir)
datafiles
['10_PMU_02_LHS_500000_54854_02_t_P.dat', '10_PMU_02_LHS_50000_54854_02_t_P.dat', '10_PMU_02_LHS_5000_54854_02_t_P.dat', '10_PMU_02_LHS_500_54854_02_t_P.dat']
dataset_all={}
for file in datafiles:
InsFile = os.path.join(data_dir, file)
data = pd.read_csv(InsFile,sep ='\t')
data.columns =[col.strip() for col in data.columns]
data =data.iloc[:-1,:-1]
dataset_all[file[:-4]] = data
data50K =dataset_all['10_PMU_02_LHS_50000_54854_02_t_P']
data50K['diff'] = data50K['totBase']-data50K['totAcid']
conditions = [
(data50K['pH'] < 7),
(data50K['pH'] ==7),
(data50K['pH'] > 7),
]
# create a list of the values we want to assign for each condition
values = [1, 2, 3]
data50K['GrouppH'] = np.select(conditions, values)
conditions = [
(data50K['metaschoepite'] ==0),
(data50K['metaschoepite'] >0),
]
# create a list of the values we want to assign for each condition
values = [1, 2]
data50K['GroupMeta'] = np.select(conditions, values)
data50K.tail()
| mass_H2O | totU | totAcid | totBase | pH | U_aq | U_s | U_sc | U_ex | Kd_s | Kd_sc | Kd_ex | metaschoepite | diff | GrouppH | GroupMeta | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 49994 | 0.999894 | 9.095900e-05 | 1.000000e-09 | 1.871300e-02 | 12.1360 | 8.897400e-05 | 1.995000e-06 | 1.995000e-06 | 2.309200e-22 | 22.422 | 22.42200 | 2.595400e-15 | 0.0 | 0.018713 | 3 | 1 |
| 49995 | 1.000060 | 2.817900e-07 | 1.000000e-09 | 8.701300e-04 | 10.7810 | 1.221800e-08 | 2.695800e-07 | 2.695800e-07 | 1.414000e-21 | 22064.000 | 22064.00000 | 1.157400e-10 | 0.0 | 0.000870 | 3 | 1 |
| 49996 | 1.000020 | 4.389100e-05 | 1.000000e-09 | 5.106400e-03 | 11.5710 | 4.189600e-05 | 1.994800e-06 | 1.994800e-06 | 1.342100e-20 | 47.614 | 47.61400 | 3.203500e-13 | 0.0 | 0.005106 | 3 | 1 |
| 49997 | 0.999935 | 1.016900e-05 | 1.422900e-02 | 1.000000e-09 | 1.9613 | 9.511300e-06 | 6.582500e-07 | 2.240300e-09 | 6.560100e-07 | 69.208 | 0.23555 | 6.897200e+01 | 0.0 | -0.014229 | 1 | 1 |
| 49998 | 0.999928 | 5.609300e-09 | 1.493300e-02 | 1.000000e-09 | 1.9398 | 5.246100e-09 | 3.636500e-10 | 1.118100e-12 | 3.625300e-10 | 69.319 | 0.21313 | 6.910500e+01 | 0.0 | -0.014933 | 1 | 1 |
fig = px.scatter_3d(data50K, x='diff', y='totU', z='pH',color='pH',symbol ='GroupMeta')
fig.show()
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_s',color='pH',symbol ='GroupMeta')
fig.show()
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_aq',color='pH',symbol ='GroupMeta')
fig.show()
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_sc',color='pH',symbol ='GroupMeta')
fig.show()
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_ex',color='pH',symbol ='GroupMeta')
fig.show()
fig = px.scatter_3d(data50K, x='diff', y='totU', z='metaschoepite',color='pH',symbol ='GroupMeta')
fig.show()
data50K.columns
Index(['mass_H2O', 'totU', 'totAcid', 'totBase', 'pH', 'U_aq', 'U_s', 'U_sc',
'U_ex', 'Kd_s', 'Kd_sc', 'Kd_ex', 'metaschoepite', 'diff', 'GrouppH',
'GroupMeta'],
dtype='object')